Load required packages
library(monocle)
library(igraph)
library(ComplexHeatmap)
library(Matrix)
library(tidyverse)
library(magrittr)
library(scales)
library(gridExtra)
library(extrafont)
library(ggrepel)
Read in expression matrix
data_directory <- "../../data/10x/"
expr_readcount_use <-
sparseMatrix(i = c(readRDS(file.path(data_directory, "expr_readcount_raw_csc_indices_part1.rds")),
readRDS(file.path(data_directory, "expr_readcount_raw_csc_indices_part2.rds"))),
p = readRDS(file.path(data_directory, "expr_readcount_raw_csc_indptr.rds")),
x = readRDS(file.path(data_directory, "expr_readcount_raw_csc_values.rds")),
dims = readRDS(file.path(data_directory, "expr_readcount_raw_csc_shape.rds")),
dimnames = readRDS(file.path(data_directory, "expr_readcount_raw_csc_dimnames.rds")),
index1 = FALSE)
dim(expr_readcount_use)
## [1] 27999 34564
Load pseudotime reconstruction result
data_directory <- file.path(data_directory,
"epicardial_3f_d7")
cell_dataset <-
readRDS(file = file.path(data_directory,
"cell_dataset_lowerDetectionLimit0.5_DDRTree_dim2_reverse.rds"))
Load gene info
gene_symbols <- read.table(file.path("../..",
"data",
"misc",
"genes.tsv"),
header = FALSE,
row.names = 1,
stringsAsFactors = FALSE)
gene_symbols <- setNames(gene_symbols[[1]], rownames(gene_symbols))
head(gene_symbols)
## ENSMUSG00000051951 ENSMUSG00000089699 ENSMUSG00000102343
## "Xkr4" "Gm1992" "Gm37381"
## ENSMUSG00000025900 ENSMUSG00000109048 ENSMUSG00000025902
## "Rp1" "Rp1" "Sox17"
Load functions
analyze_gene_enrichment <- function(expr,
cell_group_a,
cell_group_b,
gene_symbols = NULL,
expr_cpm = NULL) {
m <- Matrix::rowSums(expr[, cell_group_b] > 0)
m1 <- m
m1[m == 0] <- 1
n <- Matrix::rowSums(expr[, cell_group_a] > 0)
pv1 <- pbinom(n, length(cell_group_a), m1 / length(cell_group_b),
lower.tail = FALSE) + dbinom(n, length(cell_group_a),
m1 / length(cell_group_b))
expressed_ratio_log_fold <-
log(n * length(cell_group_b) / (m * length(cell_group_a)), base = 2)
d1 <- data.frame(log2.effect = expressed_ratio_log_fold,
pval = pv1)
d1$pos.frac.a <- Matrix::rowMeans(expr[, cell_group_a] > 0)
d1$pos.frac.b <- Matrix::rowMeans(expr[, cell_group_b] > 0)
if (! is.null(gene_symbols)) {
d1$symbol <- gene_symbols[rownames(d1)]
}
if (! is.null(expr_cpm)) {
d1$mean.cpm.a <- Matrix::rowMeans(expr_cpm[rownames(d1), cell_group_a])
d1$mean.cpm.b <- Matrix::rowMeans(expr_cpm[rownames(d1), cell_group_b])
}
d1$p.adj <- p.adjust(d1$pval, method = "BH")
return(d1)
}
extract_monocle_result <- function(cell_dataset,
x = 1,
y = 2) {
reduced_dim_coords <- monocle::reducedDimK(cell_dataset)
ica_space_df <- Matrix::t(reduced_dim_coords) %>% as.data.frame %>%
select(prin_graph_dim_1 = x,
prin_graph_dim_2 = y) %>%
mutate(sample_name = rownames(.), sample_state = rownames(.))
mst_branch_nodes <-
cell_dataset@auxOrderingData[[
cell_dataset@dim_reduce_type]]$branch_points
branch_point_df <- ica_space_df %>%
slice(match(mst_branch_nodes, sample_name)) %>%
mutate(branch_point_idx = seq_len(n()))
dp_mst <- monocle::minSpanningTree(cell_dataset)
edge_df <-
dp_mst %>% igraph::as_data_frame %>%
select(source = "from", target = "to") %>%
left_join(ica_space_df %>%
select(source = "sample_name",
source_prin_graph_dim_1 = "prin_graph_dim_1",
source_prin_graph_dim_2 = "prin_graph_dim_2"),
by = "source") %>%
left_join(ica_space_df %>%
select(target = "sample_name",
target_prin_graph_dim_1 = "prin_graph_dim_1",
target_prin_graph_dim_2 = "prin_graph_dim_2"),
by = "target")
sample_state <- pData(cell_dataset)$State
lib_info_with_pseudo <- pData(cell_dataset)
data_df <- t(monocle::reducedDimS(cell_dataset)) %>%
as.data.frame %>%
select(data_dim_1 = x, data_dim_2 = y) %>%
rownames_to_column("sample_name") %>%
mutate(sample_state) %>%
left_join(lib_info_with_pseudo %>% rownames_to_column("sample_name"),
by = "sample_name")
return(setNames(list(data_df = data_df,
edge_df = edge_df,
branch_point_df = branch_point_df),
c("data", "edge", "brach_point")))
}
plot_pseudotime_batch <- function(batch,
text,
monocle_out) {
text_use <- text
p <-
ggplot(data = subset(monocle_out[[1]], ! category %in% batch),
aes(x = data_dim_1,
y = data_dim_2)) +
geom_segment(aes_string(x = "source_prin_graph_dim_1",
y = "source_prin_graph_dim_2",
xend = "target_prin_graph_dim_1",
yend = "target_prin_graph_dim_2"),
size = .25,
linetype = "solid", na.rm = TRUE,
color = "#2196F3",
data = monocle_out[[2]]) +
geom_point(size = .3, stroke = 0, shape = 16, alpha = 1,
color = "grey70") +
geom_point(data = subset(monocle_out[[1]],
category %in% batch),
color = "#FF5722",
size = .3, stroke = 0, shape = 16, alpha = 1) +
labs(x = "Dimension 1",
y = "Dimension 2",
color = NULL) +
theme_void() +
theme(legend.justification = c(0, 1),
# legend.position = c(.15, .4),
legend.position = c(.19, .4),
legend.text = element_text(family = "Arial",
size = 4,
margin = margin(t = 0, r = 0,
b = 0, l = -1.8,
unit = "mm")),
legend.key.size = unit(1.5, "mm"),
legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "mm")) +
annotate("text",
x = -6,
y = 4.5,
family = "Arial",
label = text_use,
color = "black",
size = 2.5,
vjust = "inward", hjust = "inward")
return(p)
}
plot_expr_pseudotime_batch <- function(gene,
batch,
text,
expr_cpm,
monocle_out) {
gene_use <- gene
text_use <- text
monocle_out[[1]]$expr <-
log10(expr_cpm[gene_use, monocle_out[[1]]$sample_name] + 1)
p <-
ggplot(data = subset(monocle_out[[1]], ! category %in% batch),
aes(x = data_dim_1,
y = data_dim_2)) +
geom_segment(aes_string(x = "source_prin_graph_dim_1",
y = "source_prin_graph_dim_2",
xend = "target_prin_graph_dim_1",
yend = "target_prin_graph_dim_2"),
size = .25,
linetype = "solid", na.rm = TRUE,
color = "#2196F3",
data = monocle_out[[2]]) +
geom_point(size = .3, stroke = 0, shape = 16, alpha = 1,
color = "grey70") +
geom_point(data = subset(monocle_out[[1]],
category %in% batch)[order(subset(monocle_out[[1]],
category %in% batch)$expr),],
aes(color = expr),
size = .3, stroke = 0, shape = 16, alpha = 1) +
scale_color_viridis_c(name = NULL) +
theme_void() +
theme(legend.justification = c(0, 1),
# legend.position = c(.15, .4),
legend.position = c(.19, .4),
legend.text = element_text(family = "Arial",
size = 4,
margin = margin(t = 0, r = 0,
b = 0, l = -1.8,
unit = "mm")),
legend.key.size = unit(1.5, "mm"),
legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "mm")) +
annotate("text",
x = -6,
y = 4.5,
family = "Arial",
label = text_use,
color = "black",
size = 2.5,
vjust = "inward", hjust = "inward")
return(p)
}
Load Monocle result and calculate CPM
monocle_out <- extract_monocle_result(cell_dataset)
expr_cpm_use <- expr_readcount_use
expr_cpm_use@x <- 1000000 *
(expr_cpm_use@x / rep.int(Matrix::colSums(expr_cpm_use),
diff(expr_cpm_use@p)))
# generate cell state labels
state_labels <- monocle_out[[1]] %>%
group_by(State) %>%
summarise(x = mean(data_dim_1),
y = mean(data_dim_2)) %>% as.data.frame
p_pseudotime <-
ggplot(data = subset(monocle_out[[1]]),
aes(x = data_dim_1,
y = data_dim_2)) +
geom_segment(aes_string(x = "source_prin_graph_dim_1",
y = "source_prin_graph_dim_2",
xend = "target_prin_graph_dim_1",
yend = "target_prin_graph_dim_2"),
size = .25,
linetype = "solid", na.rm = TRUE,
color = "#2196F3",
data = monocle_out[[2]]) +
geom_point(aes(color = Pseudotime),
size = .3, stroke = 0, shape = 16, alpha = .2,
data = monocle_out[[1]]) +
scale_color_viridis_c(name = NULL) +
theme_void() +
theme(legend.justification = c(0, 1),
legend.position = c(.19, .4),
legend.text = element_text(family = "Arial",
size = 4,
margin = margin(t = 0, r = 0,
b = 0, l = -1.8,
unit = "mm")),
legend.key.size = unit(1.5, "mm"),
legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "mm")) +
annotate("text",
x = -6,
y = 4.5,
family = "Arial",
label = "Pseudotime",
color = "black",
size = 2.5,
vjust = "inward", hjust = "inward")
# add state labels
p_pseudotime <- p_pseudotime +
geom_point(data = state_labels,
aes(x, y),
size = 2.0,
stroke = 0,
shape = 22,
fill = "#E29C36",
color = "#E29C36") +
annotate("text",
x = state_labels$x,
y = state_labels$y,
label = LETTERS[as.integer(state_labels$State)],
size = 1.5,
family = "Arial",
color = "black")
p_pseudotime_bl19 <- plot_pseudotime_batch(batch = "BL19",
text = "3F, D7\n",
monocle_out = monocle_out)
p_pseudotime_primary <- plot_pseudotime_batch(batch = c("BL5", "BL6"),
text = "Primary\n",
monocle_out = monocle_out)
p_pseudotime_uninfected <- plot_pseudotime_batch(batch = "BL7",
text = "Uninfected\n",
monocle_out = monocle_out)
p_combined <- grid.arrange(p_pseudotime,
p_pseudotime_primary,
p_pseudotime_uninfected,
p_pseudotime_bl19,
nrow = 2)
# get cell groups
cell_distribution_primary <-
pData(cell_dataset) %>%
filter(category %in% c("BL5", "BL6")) %>% group_by(State) %>%
summarise(primary.count = n())
cell_distribution_uninfected <-
pData(cell_dataset) %>%
filter(category %in% c("BL7")) %>% group_by(State) %>%
summarise(uninfected.count = n())
cell_distribution_10f_d7 <-
pData(cell_dataset) %>%
filter(category %in% c("BL8")) %>% group_by(State) %>%
summarise("10f_d7.count" = n())
cell_distribution_10f_d14 <-
pData(cell_dataset) %>%
filter(category %in% c("BL18")) %>% group_by(State) %>%
summarise("10f_d14.count" = n())
cell_distribution_3f_d7 <-
pData(cell_dataset) %>%
filter(category %in% c("BL19")) %>% group_by(State) %>%
summarise("3f_d7.count" = n())
# prepare data
pseudotime_state_composition <-
full_join(cell_distribution_primary, cell_distribution_uninfected,
by = "State") %>%
full_join(cell_distribution_10f_d7, by = "State") %>%
full_join(cell_distribution_10f_d14, by = "State") %>%
full_join(cell_distribution_3f_d7, by = "State") %>%
arrange(State) %>% replace_na(list(primary.count = 0)) %>%
select(- State) %>% `t`(.) %>% `/`(rowSums(.)) %>% `t` %>%
as.data.frame %>%
mutate(state = rownames(.)) %>%
gather(key = category,
value = ratio,
- state) %>%
mutate(category = factor(category,
levels = c("primary.count",
"3f_d7.count",
"10f_d14.count",
"10f_d7.count",
"uninfected.count")),
state2 = LETTERS[as.integer(state)])
pseudotime_state_composition <-
do.call(rbind.data.frame,
lapply(levels(pseudotime_state_composition$category),
function(x) {
df_state_composition_ratio <-
pseudotime_state_composition[
pseudotime_state_composition$category == x, ]
df_state_composition_ratio$cum.ratio <-
cumsum(df_state_composition_ratio$ratio)
df_state_composition_ratio$label.position <-
df_state_composition_ratio$cum.ratio -
df_state_composition_ratio$ratio / 2
df_state_composition_ratio
}))
# plot
ggplot(pseudotime_state_composition, aes(x = category,
y = ratio,
fill = state2)) +
geom_bar(stat = "identity") + coord_flip() +
scale_fill_manual(name = "State",
values = c("#8DD3C7", "#FFFFB3", "#BEBADA",
"#FB8072", "#80B1D3", "#FDB462",
"#B3DE69",
"#F1835C", "#D9D9D9")) +
scale_x_discrete(name = NULL,
labels = c("Epicardial,\nprimary",
"3F, D7",
"10F, D14",
"10F, D7",
"Uninfected")) +
scale_y_continuous(name = "Percentage",
labels = scales::percent) +
theme_bw() +
theme(axis.text = element_text(size = 6),
axis.title = element_text(size = 6),
axis.title.x = element_text(margin = margin(t = 0, r = 0,
b = 0, l = 0,
unit = "mm")),
legend.title = element_text(size = 6),
legend.text = element_text(size = 4,
margin = margin(t = 0, r = 0,
b = 0, l = -1,
unit = "mm")),
strip.text.x = element_text(size = 4),
strip.text.y = element_text(size = 6),
legend.key.size = unit(2.5, "mm"),
legend.position = "right",
# legend.box = "vertical",
legend.direction = "vertical",
legend.margin = margin(t = 0, r = 0, b = 0, l = -2, unit = "mm")) +
geom_text(data = subset(pseudotime_state_composition,
state2 == "H"),
aes(y = 1 - label.position,
label = paste0(round(subset(pseudotime_state_composition,
state2 == "H")$ratio, 4) * 100,
"%")), size = 2) +
guides(fill = guide_legend(ncol = 3, byrow = TRUE))
pseudotime_state_composition2 <-
do.call(rbind.data.frame,
lapply(levels(pData(cell_dataset)$State), function(x) {
composition <-
as.data.frame(table(pData(cell_dataset)[
pData(cell_dataset)$State == x, "category"]))
names(composition) <- c("category", "count")
composition$state <- x
composition <-
composition[match(composition$category,
(c("BL7", "BL8",
"BL18", "BL19", "BL5"))), ]
composition %<>%
mutate(percentage = count / sum(composition$count),
label = paste0(round(count / sum(composition$count),
3)
* 100, "%"),
label.position = cumsum(percentage) - percentage / 2)
return(composition)
})) %>% mutate(category = factor(category,
levels = rev(c("BL7", "BL8",
"BL18", "BL19",
"BL5"))))
facet_labels <- paste("State",
LETTERS[as.integer(unique(pseudotime_state_composition2$state))])
names(facet_labels) <- unique(pseudotime_state_composition2$state)
ggplot(pseudotime_state_composition2,
aes(x = "", y = percentage, fill = category)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
facet_wrap(~ state,
labeller = labeller(state = facet_labels)) +
geom_text(data = pseudotime_state_composition2,
aes(y = label.position,
label = pseudotime_state_composition2$label), size = 1.5) +
scale_fill_manual(name = NULL,
labels = c("Epicardial, primary",
"3F, D7",
"10F, D14",
"10F, D7",
"Uninfected"),
values = c("#e37b3e",
"#44b29c",
"#de5a49",
"#f7a8a5",
"#f0ca4e")) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
# panel.border = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank(),
# text = element_text(size = 4),
legend.text = element_text(size = 4,
margin = margin(t = 0, r = 0,
b = 0, l = -1.5,
unit = "mm")),
strip.text.x = element_text(size = 4),
strip.text.y = element_text(size = 4),
legend.key.size = unit(2.5, "mm"),
legend.margin = margin(t = 0, r = 0, b = 0, l = -3, unit = "mm"))
# Wt1
grep("Wt1", gene_symbols, value = TRUE)
## ENSMUSG00000074987 ENSMUSG00000016458
## "Wt1os" "Wt1"
selected_gene <- "ENSMUSG00000016458"
# gene_symbols[selected_gene]
p_pseudotime_bl19_Wt1 <-
plot_expr_pseudotime_batch(gene = selected_gene,
batch = c("BL19"),
text = paste(gene_symbols[selected_gene],
"3F, Day 7", sep = "\n"),
expr_cpm = expr_cpm_use,
monocle_out = monocle_out)
grep("Bnc1", gene_symbols, value = TRUE)
## ENSMUSG00000025105
## "Bnc1"
selected_gene <- "ENSMUSG00000025105"
# gene_symbols[selected_gene]
p_pseudotime_bl19_Bnc1 <-
plot_expr_pseudotime_batch(gene = selected_gene,
batch = c("BL19"),
text = paste(gene_symbols[selected_gene],
"3F, Day 7", sep = "\n"),
expr_cpm = expr_cpm_use,
monocle_out = monocle_out)
grep("Myrf", gene_symbols, value = TRUE)
## ENSMUSG00000034057 ENSMUSG00000036098
## "Myrfl" "Myrf"
selected_gene <- "ENSMUSG00000036098"
# gene_symbols[selected_gene]
p_pseudotime_bl19_Myrf <-
plot_expr_pseudotime_batch(gene = selected_gene,
batch = c("BL19"),
text = paste(gene_symbols[selected_gene],
"3F, Day 7", sep = "\n"),
expr_cpm = expr_cpm_use,
monocle_out = monocle_out)
grep("Upk3b", gene_symbols, value = TRUE)
## ENSMUSG00000042985 ENSMUSG00000006143
## "Upk3b" "Upk3bl"
selected_gene <- "ENSMUSG00000042985"
# gene_symbols[selected_gene]
p_pseudotime_bl19_Upk3b <-
plot_expr_pseudotime_batch(gene = selected_gene,
batch = c("BL19"),
text = paste(gene_symbols[selected_gene],
"3F, Day 7", sep = "\n"),
expr_cpm = expr_cpm_use,
monocle_out = monocle_out)
p_combined <- grid.arrange(p_pseudotime_bl19_Wt1,
p_pseudotime_bl19_Bnc1,
p_pseudotime_bl19_Myrf,
p_pseudotime_bl19_Upk3b,
nrow = 2)
# select genes
genes_selected_10 <- c("ENSMUSG00000016458",
"ENSMUSG00000051910",
"ENSMUSG00000025105",
"ENSMUSG00000031965",
"ENSMUSG00000036098",
"ENSMUSG00000045680",
"ENSMUSG00000026628",
"ENSMUSG00000038193",
"ENSMUSG00000005836",
"ENSMUSG00000032419")
cells_uninfected <-
pData(cell_dataset) %>% mutate(cell = rownames(.)) %>%
filter(category %in% c("BL7")) %>%
.$cell
# Normalize
expr_readcount_norm <- expr_readcount_use
expr_readcount_norm <-
expr_readcount_norm[, Matrix::colSums(expr_readcount_norm > 0) >= 200]
expr_readcount_norm <-
expr_readcount_norm[Matrix::rowSums(expr_readcount_norm > 0) >= 30, ]
expr_readcount_norm <-
expr_readcount_norm[Matrix::rowSums(expr_readcount_norm) >= 60, ]
expr_readcount_norm@x <- median(Matrix::colSums(expr_readcount_norm)) *
(expr_readcount_norm@x / rep.int(Matrix::colSums(expr_readcount_norm),
diff(expr_readcount_norm@p)))
summary_enrichment_genes_selected <-
do.call(rbind.data.frame,
lapply(levels(pData(cell_dataset)$State), function(x) {
cat("Computing cell state", LETTERS[as.integer(x)], "\n")
cells_in_selected_group <-
rownames(pData(cell_dataset)[pData(cell_dataset)$State == x &
pData(cell_dataset)$category %in% c("BL19"),])
y <- analyze_gene_enrichment(expr = expr_readcount_norm,
cell_group_a = cells_in_selected_group,
cell_group_b = cells_uninfected,
gene_symbols = gene_symbols[rownames(expr_readcount_norm)],
expr_cpm = expr_cpm_use)[genes_selected_10, ]
y$category <- x
return(y)
})) %>%
mutate(p.adj2 = ifelse(-log10(p.adj) == Inf, 400, -log10(p.adj)))
## Computing cell state A
## Computing cell state B
## Computing cell state C
## Computing cell state D
## Computing cell state E
## Computing cell state F
## Computing cell state G
## Computing cell state H
## Computing cell state I
facet_labels <- paste("State",
LETTERS[as.integer(unique(summary_enrichment_genes_selected$category))])
names(facet_labels) <- unique(summary_enrichment_genes_selected$category)
# plot
ggplot() +
geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_point(data = summary_enrichment_genes_selected,
aes(pos.frac.b,
pos.frac.a,
size = p.adj2,
color = log2.effect),
alpha = .8,
stroke = 0, shape = 16) +
facet_wrap(~ category,
nrow = 3,
labeller = labeller(category = facet_labels)) +
coord_fixed() +
scale_x_continuous(name = "Expr (%, uninfected cells)",
limits = c(0, 1), breaks = seq(0, 1, .2)) +
scale_y_continuous(name = "Expr (%, indicated state)",
limits = c(0, 1), breaks = seq(0, 1, .2)) +
scale_color_viridis_c(name = expression(paste("Log"[2], " effect"))) +
theme_bw() +
labs(size = expression(paste("-log"[10], " (p-value)"))) +
theme(axis.text = element_text(family = "Arial", size = 7),
axis.title = element_text(family = "Arial", size = 8),
strip.text.x = element_text(family = "Arial", size = 8),
legend.title = element_text(family = "Arial", size = 8),
legend.text = element_text(family = "Arial", size = 6),
legend.position = "bottom",
legend.box = "horizontal",
legend.key.size = unit(2.5, "mm"),
legend.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "mm"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
guides(color = guide_colorbar(order = 1),
size = guide_legend(order = 2)) +
geom_text_repel(data = summary_enrichment_genes_selected,
aes(pos.frac.b,
pos.frac.a,
label = summary_enrichment_genes_selected$symbol),
size = 2.5,
family = "Arial",
box.padding = .2,
point.padding = .2,
nudge_y = .15,
arrow = arrow(length = unit(.02, 'npc')),
segment.color = "grey35",
color = "black")
# get cell groups
cells_reprogr <-
pData(cell_dataset) %>% mutate(cell = rownames(.)) %>%
filter(! category %in% c("BL5", "BL6", "BL7")) %>%
.$cell
cells_bl19 <-
pData(cell_dataset) %>% mutate(cell = rownames(.)) %>%
filter(category %in% c("BL19")) %>%
.$cell
cells_trajectory_ordered <-
pData(cell_dataset) %>%
mutate(cell = rownames(.)) %>%
arrange(Pseudotime) %>% .$cell
cell_states_selected <- c(1, 2, 4, 6, 8)
cells_trajectory_ordered_use <-
pData(cell_dataset) %>%
mutate(cell = rownames(.)) %>%
filter(State %in% cell_states_selected & ! category %in% c("BL5",
"BL6",
"BL7")) %>%
arrange(Pseudotime) %>% .$cell
cells_trajectory_ordered_lst_use <-
lapply(cell_states_selected, function(x) {
cells_in_selected_group <-
rownames(pData(cell_dataset))[
pData(cell_dataset)$State == x]
cells_trajectory_ordered_use[
cells_trajectory_ordered_use %in% cells_in_selected_group
]
})
names(cells_trajectory_ordered_lst_use) <- cell_states_selected
# define function to merge cells
make_cell_groups <- function(cells,
number_of_members) {
if (length(cells) %% number_of_members == 0) {
mat <- matrix(cells, nrow = number_of_members)
mat_lst <- lapply(as.list(as.data.frame(mat)), as.character)
} else {
cells_remainder <- cells[(length(cells) -
(length(cells) %% number_of_members) + 1) :
length(cells)]
cells_selected <- cells[! cells %in% cells_remainder]
mat <- matrix(cells_selected, nrow = number_of_members)
mat_lst <- lapply(as.list(as.data.frame(mat)), as.character)
mat_lst[[length(mat_lst)]] <- c(mat_lst[[length(mat_lst)]],
cells_remainder)
}
return(mat_lst)
}
# set number of cells to merge
number_of_members <- 20
expr_cpm_avg_cells_trajectory_ordered_lst_use <-
lapply(cells_trajectory_ordered_lst_use, function(x) {
cells_lst <- make_cell_groups(x, number_of_members)
sapply(cells_lst, function(y) {
Matrix::rowMeans(expr_cpm_use[, y])
})
})
num_meta_cells <- lapply(expr_cpm_avg_cells_trajectory_ordered_lst_use,
function(x) {dim(x)[2]})
expr_cpm_avg_pseudo_meta_cells <-
do.call(cbind.data.frame, expr_cpm_avg_cells_trajectory_ordered_lst_use)
colnames(expr_cpm_avg_pseudo_meta_cells) <-
paste("reprogr", rep(letters[cell_states_selected],
num_meta_cells),
seq_len(ncol(expr_cpm_avg_pseudo_meta_cells)), sep = ".")
kmeans_mat_use <-
log10(expr_cpm_avg_pseudo_meta_cells[
str_replace(rownames(readRDS(file.path(data_directory,
"de_paired_primary_epicardial_uninfected.rds"))),
"_.+$", "")
, ] + 1)
# select genes to use
pos_frac <- .2
genes_kmeans_use <- rownames(kmeans_mat_use)[rowMeans(kmeans_mat_use > 0)
>= pos_frac]
kmeans_mat_use <- kmeans_mat_use[genes_kmeans_use,]
kmeans_mat_scaled_use <- t(scale(t(kmeans_mat_use)))
num_centers <- 6
seed_kmeans <- 20180706
set.seed(seed_kmeans)
kmeans_out <- kmeans(kmeans_mat_scaled_use,
num_centers,
iter.max = 10,
nstart = 20)
x_breaks <- quantile(seq_len(dim(kmeans_out$centers)[2]),
probs = seq(0, 1, 1))
# rename k-means clusters
facet_labels <- paste("Signature", seq_len(num_centers))
names(facet_labels) <- c(6,
1,
3,
5,
2,
4)
num_genes <- as.data.frame(table(kmeans_out$cluster))
names(num_genes) <- c("cluster", "count")
num_genes$x <- quantile(seq_len(dim(kmeans_out$centers)[2]), .5)
num_genes$y <- 1
num_genes$count2 <- paste("n =", num_genes$count)
sum(num_genes$count)
## [1] 2040
# plot
kmeans_out$centers %>% t %>% as.data.frame %>%
mutate(meta.cell = rownames(.),
x = seq_len(nrow(.))) %>%
gather(key = cluster,
value = expr,
- c(meta.cell, x)) %>%
mutate(cluster = factor(cluster,
levels = c(6,
1,
3,
5,
2,
4))) %>%
ggplot(aes(x, expr)) + geom_line(size = .2) +
facet_wrap(~ cluster, nrow = 2,
labeller = labeller(cluster = facet_labels)) +
labs(title = "Reprogrammed cells",
y = "Scaled expr (z-score)") +
scale_x_continuous(name = "Scaled pseudotime (state ABDFH)",
breaks = x_breaks) +
theme_bw() +
theme(axis.title = element_text(family = "Arial",
size = 8),
axis.text = element_text(family = "Arial", size = 7),
axis.text.x = element_text(family = "Arial",
angle = 90,
vjust = .5,
hjust = 1,
size = 7),
strip.text.x = element_text(family = "Arial",size = 7),
plot.title = element_text(family = "Arial",
size = 8,
# margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "mm"))
hjust = .5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_text(data = num_genes,
aes(x, y, label = count2),
family = "Arial",
vjust = -.5,
size = 2.5)
cells_primary <-
pData(cell_dataset) %>% mutate(cell = rownames(.)) %>%
filter(category %in% c("BL5", "BL6")) %>%
.$cell
cells_primary_pseudotime_ordered <-
cells_trajectory_ordered[cells_trajectory_ordered %in% cells_primary]
cells_primary_pseudotime_ordered_lst <-
make_cell_groups(cells = cells_primary_pseudotime_ordered,
number_of_members = 20)
expr_cpm_avg_cells_pseudotime_ordered_primary <-
sapply(cells_primary_pseudotime_ordered_lst, function(y) {
Matrix::rowMeans(expr_cpm_use[, y])})
colnames(expr_cpm_avg_cells_pseudotime_ordered_primary) <-
c("primary.1", "primary.2", "primary.3")
expr_cpm_avg_pseudo_meta_cells_reprogr_primary <-
cbind(expr_cpm_avg_pseudo_meta_cells,
expr_cpm_avg_cells_pseudotime_ordered_primary)
heatmap_mat_use <- expr_cpm_avg_pseudo_meta_cells_reprogr_primary
heatmap_mat_scaled_use <- t(scale(t(heatmap_mat_use)))
heatmap_mat_scaled_use <- heatmap_mat_scaled_use[genes_kmeans_use,]
column_annotation_complexheatmap_1 <-
data.frame(State = rep(paste("State",
LETTERS[cell_states_selected]),
num_meta_cells))
rownames(column_annotation_complexheatmap_1) <-
seq_len(nrow(column_annotation_complexheatmap_1))
ha1 <- HeatmapAnnotation(df = column_annotation_complexheatmap_1,
col = list(State = c(setNames(c("black",
"grey70",
"grey70",
"grey70",
"black"),
paste("State", LETTERS[cell_states_selected])))),
annotation_legend_param = list(State = list(title = "State",
title_gp = gpar(fontsize = 8),
labels_gp = gpar(fontsize = 7))))
column_annotation_complexheatmap_2 <-
data.frame(Cell = rep("Epicardial", 3))
ha2 <- HeatmapAnnotation(df = column_annotation_complexheatmap_2,
col = list(Cell = c("Epicardial" = "#56B4E9")),
annotation_legend_param = list(Cell = list(title = "Cell",
title_gp = gpar(fontsize = 8),
labels_gp = gpar(fontsize = 7))))
genes_cluster_dend_ordered_lst <-
sapply(c(6,
1,
3,
5,
2,
4), function (x) {
genes_in_cluster <- names(kmeans_out$cluster)[kmeans_out$cluster == x]
dend <- hclust(dist(heatmap_mat_scaled_use[genes_in_cluster,]))
dend$labels
})
genes_cluster_dend_ordered <-
unlist(genes_cluster_dend_ordered_lst)
heatmap_mat_scaled_use <- heatmap_mat_scaled_use[genes_cluster_dend_ordered,]
# set row annotation
row_annotation_complexheatmap <-
data.frame(Pattern = rep(1:6, lapply(genes_cluster_dend_ordered_lst,
length)))
ha_row <- rowAnnotation(
df = row_annotation_complexheatmap,
col = list(Pattern = c(setNames(RColorBrewer::brewer.pal(n = 11,
name = "Set3")[1:6],
1:6))),
width = unit(1, "mm"),
annotation_legend_param = list(
Pattern = list(title = "Pattern",
title_gp = gpar(fontsize = 8),
labels_gp = gpar(fontsize = 7))))
# get heatmap 1
hm1 <- Heatmap(heatmap_mat_scaled_use[, 1:485],
cluster_columns = FALSE,
cluster_rows = FALSE,
show_row_dend = FALSE,
show_column_names = FALSE,
show_row_names = FALSE,
col = circlize::colorRamp2(c(-2, 0, 2),
c("#4575B4", "white", "#D73027")),
heatmap_legend_param =list(title = "Expr",
title_gp = gpar(fontfamily = "Arial",
# fontsize = 8),
fontsize = 6),
legend_direction = "vertical",
labels_gp = gpar(family = "Arial",
# fontsize = 7),
fontsize = 5),
# legend_width = unit(2, "mm"),
legend_height = unit(5, "mm")),
top_annotation = ha1,
top_annotation_height = unit(1, "mm"),
width = 2)
# get heatmap 2
hm2 <- Heatmap(heatmap_mat_scaled_use[, 486:488],
cluster_columns = FALSE,
cluster_rows = FALSE,
show_row_dend = FALSE,
show_column_names = FALSE,
show_row_names = FALSE,
col = circlize::colorRamp2(c(-2, 0, 2),
c("#4575B4", "white", "#D73027")),
heatmap_legend_param = list(title = "Expr",
title_gp = gpar(fontfamily = "Arial",
fontsize = 8),
labels_gp = gpar(family = "Arial",
fontsize = 7),
legend_width = unit(2, "mm"),
legend_height = unit(2, "mm")),
show_heatmap_legend = FALSE,
top_annotation = ha2,
top_annotation_height = unit(1, "mm"),
width = .02)
# select genes to mark
genes_to_mark <- unique(c(genes_selected_10,
"ENSMUSG00000015627",
"ENSMUSG00000031517"))
heatmap_mat_scaled_subset_indices <-
which(rownames(heatmap_mat_scaled_use) %in% genes_to_mark)
heatmap_mat_scaled_subset_indices_labels <-
gene_symbols[rownames(heatmap_mat_scaled_use)][heatmap_mat_scaled_subset_indices]
marked_genes <-
rowAnnotation(link = row_anno_link(at = heatmap_mat_scaled_subset_indices,
labels = heatmap_mat_scaled_subset_indices_labels,
side = "right",
labels_gp = gpar(fontfamily = "Arial",
fontsize = 5),
lines_gp = gpar(col = "grey50",
lwd = .5),
padding = 1),
width = unit(2.0, "mm") +
max_text_width(heatmap_mat_scaled_subset_indices_labels,
gp = gpar(fontsize = 6)))
# plot
draw(ha_row + hm1 + hm2 + marked_genes,
gap = unit(c(1, 1, 1), "mm"))
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin18.2.0 (64-bit)
## Running under: macOS Mojave 10.14.3
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.5/lib/libopenblas_haswellp-r0.3.5.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid splines stats4 parallel stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] ggrepel_0.8.0 extrafont_0.17 gridExtra_2.3
## [4] scales_1.0.0.9000 magrittr_1.5.0.9000 forcats_0.4.0.9000
## [7] stringr_1.4.0.9000 dplyr_0.8.0.9006 purrr_0.3.1
## [10] readr_1.3.1.9000 tidyr_0.8.3.9000 tibble_2.0.1.9001
## [13] tidyverse_1.2.1.9000 ComplexHeatmap_1.20.0 igraph_1.2.4
## [16] monocle_2.10.1 DDRTree_0.1.5 irlba_2.3.3
## [19] VGAM_1.1-1 ggplot2_3.1.0.9000 Biobase_2.42.0
## [22] BiocGenerics_0.28.0 Matrix_1.2-15
##
## loaded via a namespace (and not attached):
## [1] matrixStats_0.54.0 fs_1.2.6.9000 lubridate_1.7.4.9000
## [4] RColorBrewer_1.1-2 httr_1.4.0.9000 docopt_0.6.1
## [7] tools_3.5.2 backports_1.1.3 R6_2.4.0
## [10] DBI_1.0.0 lazyeval_0.2.1 colorspace_1.4-0
## [13] GetoptLong_0.1.7 withr_2.1.2 tidyselect_0.2.5
## [16] extrafontdb_1.0 compiler_3.5.2 cli_1.0.1
## [19] rvest_0.3.2 xml2_1.2.0 labeling_0.3
## [22] slam_0.1-45 digest_0.6.18 rmarkdown_1.11
## [25] sparsesvd_0.1-4 pkgconfig_2.0.2 htmltools_0.3.6
## [28] dbplyr_1.3.0 limma_3.38.3 rlang_0.3.1.9000
## [31] GlobalOptions_0.1.0 readxl_1.3.0.9000 rstudioapi_0.9.0
## [34] FNN_1.1.3 shape_1.4.4 generics_0.0.2
## [37] combinat_0.0-8 jsonlite_1.6 Rcpp_1.0.0
## [40] munsell_0.5.0 viridis_0.5.1 stringi_1.3.1
## [43] yaml_2.2.0 Rtsne_0.15 plyr_1.8.4
## [46] crayon_1.3.4 lattice_0.20-38 haven_2.1.0
## [49] circlize_0.4.5 hms_0.4.2.9001 zeallot_0.1.0
## [52] knitr_1.21 pillar_1.3.1.9000 rjson_0.2.20
## [55] reshape2_1.4.3 reprex_0.2.1.9000 glue_1.3.0.9000
## [58] evaluate_0.13 modelr_0.1.4 vctrs_0.1.0.9002
## [61] Rttf2pt1_1.3.7 cellranger_1.1.0 gtable_0.2.0
## [64] RANN_2.6.1 assertthat_0.2.0 xfun_0.5
## [67] broom_0.5.1.9000 qlcMatrix_0.9.7 HSMMSingleCell_1.2.0
## [70] viridisLite_0.3.0 pheatmap_1.0.12 cluster_2.0.7-1
## [73] fastICA_1.2-1 densityClust_0.3